:l Plotting.hs
:opt no-lint
-- all Haskell programs are mathematical expressions
True
True
-- all expressions are statically typed
:t True
:t False
-- functions have types. functions are pure
:t not
-- function application
not True
:t (not True)
False
seven = 7 :: Int
not seven
<interactive>:1:5: error:
• Couldn't match expected type ‘Bool’ with actual type ‘Int’
• In the first argument of ‘not’, namely ‘seven’
In the expression: not seven
In an equation for ‘it’: it = not seven
-- types can take types as arguments
:t [True, False, True]
:t [seven]
-- universal quantification
id True
:t id
True
-- map applies a function to each element in a list
map not [True, False]
:t map not [True, False]
[False,True]
-- map takes a function as its argument
:t map
-- things can be defined recursively
-- Haskell is lazy
h :: [Int]
h = 1 : h
take 10 h
[1,1,1,1,1,1,1,1,1,1]
-- here's an example using map
g :: [Int]
g = 1 : map (+1) g
take 10 g
[1,2,3,4,5,6,7,8,9,10]
-- functions compose
not (not True)
(not . not) True
((> 0) . (+1)) 0
True
True
True
import qualified Diagrams.Prelude as D
import Diagrams.Prelude (V2(..), (#))
import qualified Diagrams.Backend.Cairo as C
:e FlexibleContexts
boxDiagram t = D.hsep 0.1 [
D.circle 0.001 # D.named "in",
(D.text t # D.scale 0.02 <> D.rect 0.25 0.25) # D.named "box",
D.circle 0.001 # D.named "out"]
# D.connectOutside "out" "box" # D.connectOutside "box" "in"
diagram $ D.hsep 0 [boxDiagram "\tnot::\nBool -> Bool", boxDiagram "\tnot::\nBool -> Bool"]
diagram $ D.hsep 0 [boxDiagram "\t(>0)::\nInt -> Bool", boxDiagram "\t(+1)::\nInt -> Int"]
:t (.)
-- here's an example
isEven :: Int -> Bool
isEven x = x `mod` 2 == 0
-- given any list (infinite or otherwise), drop all the odd numbers
evens :: [Int] -> [Int]
evens = filter isEven
tenEvens :: [Int] -> [Int]
tenEvens = take 10 . evens
tenEvens [1..]
[2,4,6,8,10,12,14,16,18,20]
sumOfEvens = foldr1 (+) . take 10 . filter isEven
sumOfEvens [1..]
110
diagram $ D.hsep 0 [
boxDiagram "(foldr1 (+))::\n[Int] -> Int",
boxDiagram "(take 10)::\n[Int] -> [Int]",
boxDiagram "(filter isEven)::\n[Int] -> [Int]"]
:e ImportQualifiedPost
:e FlexibleContexts
:e BlockArguments
:e TupleSections
:e FlexibleContexts
:e OverloadedStrings
:e LambdaCase
:e RankNTypes
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
import Control.Arrow (first)
import Data.Text (pack)
import qualified Data.Text as T
import Control.Monad
import Numeric.Log
import Control.Monad.Loops
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Enumerator hiding (expectation)
import Control.Monad.Bayes.Weighted
import Control.Monad.Bayes.Sampler
import Control.Monad.Bayes.Integrator
import Control.Monad.Bayes.Population
import Control.Monad.Bayes.Density.Free
import Control.Monad.Bayes.Traced.Static
import Control.Monad.Bayes.Inference.SMC
import qualified Diagrams.Prelude as D
import Diagrams.Prelude (V2(..), (#))
import qualified Diagrams.Backend.Cairo as C
import qualified Pipes.Prelude as P
import qualified Pipes as P
import Pipes (Producer, (>->), MonadTrans (lift))
import Control.Applicative
type Distribution a = forall m . MonadSample m => m a
type Measure a = forall m . MonadInfer m => m a
type Real = Double
type Diagram = D.Diagram C.B
displayDiagram = diagram
-- here is a new function bernoulli, along with a new type Distribution
-- both are from probabilistic programming library monad-bayes
model :: Distribution Bool
model = bernoulli 0.7
-- we can sample from the model using the sampleIO function from monad-bayes
sampleIO model
:t sampleIO model
False
-- we can explore all possible outcomes
enumerate model
[(True,0.7),(False,0.30000000000000004)]
-- and plot it of course
plotVega (first (pack . show) <$> (enumerate model))
-- we can map a function over a distribution (using a variant called fmap)
model :: Distribution Bool
model = fmap not (bernoulli 0.7)
plotVega (first (pack . show) <$> (enumerate model))
-- mapping has non-trivial effects on the distribution if the function is not injective
model :: Distribution Bool
model = fmap (const True) (bernoulli 0.7)
plotVega (first (pack . show) <$> (enumerate model))
-- in fact, we can construct many distributions as functions of the uniform distribution
model :: Distribution Bool
model = fmap (< 0.7) random
sampleIO model
True
model :: Distribution Real
model = normal 0 1
sampleIO model
0.575028314963377
expectation model
2.3140142600862594e-18
plotVega $ histogram 1000 0.01 model
sampleIO $ runWith [0.2] model
(-0.8416212335729143,[0.2])
sampleIO $ prior $ mh 10 $ model
[-2.0085940235888478,-6.697695689073753e-2,4.840119410529751e-2,0.29846792719513393,3.702378712275772e-2,-0.6387938012141859,-0.6357890332968017,-1.5040017530000065,0.7706396345502186,0.37887755061806694,0.5113217567538375]
-- we can hand the result of one distribution to a Markov kernel, and thus construct more complex distributions
-- distributions as programs, hence: probabilistic programming
model :: Distribution Real
model = do
c <- bernoulli 0.2
if c then normal 10 1 else normal (-5) 5
View this as an abstract description of a distribution. It is composed from simpler distributions, and can itself be a piece in larger distributions.
sampleIO model
-5.9899054414630175
expectation model
-1.999999999999914
plotVega $ histogram 250 0.2 model
-- distributions need not be normalized
model2 :: Measure Real
model2 = do
c <- bernoulli 0.2
x <- if c then normal 10 1 else normal (-5) 5
condition (x > 0)
return x
plotVega $ histogram 250 0.2 model2
-- in fact, we can add arbitrary factors to the "graph"
model3 :: Measure Real
model3 = do
x <- normal 0 10
factor $ Exp (cos x )
return x
plotVega $ histogram 800 0.1 model3
We can use laziness to specify distributions over infinite lists (and other infinite structures) which we subsequently marginalize to finite distributions, but only as a final step.
import qualified Control.Monad.Bayes.Sampler.Lazy as L
-- all the elements of the support are infinite lists. Yikes!
infiniteListDistribution :: Distribution [Real]
infiniteListDistribution = do
x <- random
fmap (x :) infiniteListDistribution
-- x is an infinite list, drawn at random!
infiniteList <- L.sample infiniteListDistribution
-- take the first four elements
take 4 infiniteList
[0.45183909465222627,0.8074186434579311,0.730350161885299,6.352777832006962e-2]
-- or use fmap to directly obtain a distribution over finite lists
finiteListDistribution :: Distribution [Real]
finiteListDistribution = fmap (take 4) infiniteListDistribution
L.sample finiteListDistribution
[0.9182056070270405,4.200877821885529e-2,0.9633448686258755,0.6540031820992496]
interestingInfiniteListDistribution = do
i1 <- infiniteListDistribution
i2 <- infiniteListDistribution
return (zipWith (+) i1 (tail i2))
L.sample (fmap (take 4) interestingInfiniteListDistribution)
[1.4690753397188483,1.2841891533542422,0.48988832547910144,0.8318796414908216]
-- kernel :: MonadSample m => V2 Real -> Distribution (V2 Real)
kernel (V2 x y) = do
newX <- normal x 0.01
newY <- normal y 0.01
return (V2 newX newY)
randomWalk :: Distribution [V2 Real]
randomWalk = unfoldrM (fmap (Just . (\x -> (x,x))) . kernel) 0
x <- L.sample randomWalk
take 4 x
[V2 (-2.8460842765943954e-3) (-1.5754468441311727e-2),V2 9.91502562264479e-3 (-2.6894296308489257e-2),V2 1.6882001536142836e-2 (-2.4855887415329417e-2),V2 5.0496205366172674e-3 (-1.5198674955383825e-2)]
randomWalk¶Given any function on lists of 2D vectors, we can use that function to transform randomWalk. As a neat example, let's introduce the type Diagram of diagrams, which are renderable and composable images:
circ = D.circle 1 # D.frame 1 :: Diagram
displayDiagram circ
diagram $ (circ <> circ # D.translateX 1)
simpleDiagramDistribution :: Distribution Diagram
simpleDiagramDistribution = uniformD [D.circle 1, D.rect 1 1]
x <- sampleIO simpleDiagramDistribution
displayDiagram x
randomWalk into a distribution over diagrams¶We can now create a more interesting distribution over diagrams, by transforming randomWalk
convertPointToDiagram :: V2 Real -> Diagram
convertPointToDiagram point = D.circle 0.001 # D.translate point
convertListofNPointsToDiagram :: Int -> [V2 Real] -> Diagram
convertListofNPointsToDiagram n = mconcat . take n . fmap convertPointToDiagram
diagramList :: Distribution Diagram
diagramList = fmap (convertListofNPointsToDiagram 10000) randomWalk
d <- L.sample diagramList
displayDiagram d
randomWalk2 = fmap (scanl (+) 0) randomWalk
d <- L.sample $ fmap (convertListofNPointsToDiagram 100) randomWalk2
displayDiagram d
-- randomWalk3 = D.liftA2 (zipWith (+)) randomWalk (tail <$> randomWalk)
-- d <- L.sample $ fmap (convertListofNPointsToDiagram 10000) randomWalk3
-- displayDiagram d
:e BlockArguments
:e LambdaCase
:e OverloadedStrings
import Data.Aeson.Lens
import Control.Lens hiding (nth, (#))
import Data.Aeson
import Data.Maybe (fromMaybe)
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Sampler
import Data.Monoid
import qualified Data.ByteString.Lazy as B
import Text.Pretty.Simple
json <- fromMaybe undefined . decode <$> B.readFile "file.json" :: IO Value
pPrint json
Object
( fromList
[
( "address"
, Object
( fromList
[
( "id"
, Number 5.4
)
,
( "streetAddress"
, String "21 2nd Street"
)
]
)
)
,
( "age"
, Number 27.0
)
,
( "height"
, Number 1.5
)
,
( "isAlive"
, Bool True
)
,
( "name"
, String "John"
)
]
)
-- noisifyString :: T.Text -> Distribution T.Text
noisifyString t = fmap T.pack $ forM (T.unpack t) $ \letter -> do
x <- bernoulli 0.2
if x then uniformD "abcdefghijklmnopqrstuvwxyz" else return letter
jsonDist :: Distribution Value
jsonDist =
((transformM . _Double) (\case x -> normal x 0.001) >=>
(transformM . _Bool) (\case x -> bernoulli 0.9 ) >=>
(transformM . _String) noisifyString
)
json
x <- sampleIO jsonDist
pPrint x
Object
( fromList
[
( "address"
, Object
( fromList
[
( "id"
, Number 5.399309290336649524988388293422758579254150390625
)
,
( "streetAddress"
, String "21 rnk Sqreet"
)
]
)
)
,
( "age"
, Number 27.001077265144541428298907703720033168792724609375
)
,
( "height"
, Number 1.50019777377331475776145452982746064662933349609375
)
,
( "isAlive"
, Bool True
)
,
( "name"
, String "iohn"
)
]
)
See demo
An obvious application of probabilistic programming is to describe statistical models and perform Bayesian inference. Here are two typical cases.
import Control.Arrow (second)
import Control.Monad.Bayes.Inference.PMMH
import Control.Monad.Bayes.Inference.RMSMC
paramPrior = do
slope <- normal 0 2
intercept <- normal 0 2
noise <- gamma 7 7
prob_outlier <- uniform 0 0.5
return (slope, intercept, noise, prob_outlier)
forward (slope, intercept, noise, probOutlier) x = do
isOutlier <- bernoulli probOutlier
let meanParams = if isOutlier
then (0, 20)
else (x*slope + intercept, sqrt noise)
return (meanParams, isOutlier)
regressionWithOutliersData :: (MonadSample m, Traversable t) => t Double -> m (t ((Double, Double), Bool))
regressionWithOutliersData xs = do
params <- paramPrior
forM xs \x -> do
((mu, std), isOutlier) <- forward params x
y <- normal mu std
return ((x, y), isOutlier)
range = [-10,-9.9..10] :: [Double]
samples <- sampleIOfixed $ regressionWithOutliersData range
plotVega (fmap (second (T.pack . show)) samples)
regressionWithOutliers xs ys params = do
outliers <- forM (zip xs ys) \(x, y) -> do
((mu, std), isOutlier) <- forward params x
factor $ normalPdf mu std y
return isOutlier
return (params, outliers)
mhRuns <- sampleIOfixed $ prior $ pmmh 10 200 100
paramPrior
(regressionWithOutliers range (snd . fst <$> samples))
m = fmap (\((_,a), b) -> (a,b)) $ head mhRuns
outlierProb s = (\(x, y) -> ln (exp (x / (x+y))) )
<$> (foldr
\(lb, d) li ->
[ if b then (num1+d, num2) else (num1,num2+d) | (b,(num1, num2)) <- zip lb li])
(Prelude.repeat (0 :: Log Double,0 :: Log Double)) s
plotVega $ take 1000 (zip (fst <$> samples) (outlierProb m))
As the above plot shows, this works nicely: the slope, intercept, noise and prob_outlier variables are inferred by a random walk through the space, while the score to determine whether to accept a new proposed step in this walk is determined by a particle filter which guesses which points are outliers after each observation.
import Control.Applicative
import qualified Data.Text as T
import Pipes (Producer, (>->))
import qualified Pipes as P
import Pipes.Prelude (unfoldr)
import qualified Pipes.Prelude as P
import Data.Ord
import Data.List
import Control.Monad
import Control.Arrow (first)
variance = 1
-- how to move from one latent state to the next
latentTransition :: MonadSample m => (Double, Double) -> m (Double, Double)
latentTransition (x,y) =
liftA2 (,) (normal (x+0.5) variance) (normal (y+0.5) variance)
-- a Markovian random walk starting at (0,0), with latentTransition as the kernel
-- a Producer is an infinite stream of values
walk :: MonadSample m => Producer (Double,Double) m r
walk = flip P.unfoldr (0,0) $ \s ->
Right <$> do
new <- latentTransition s
return (new, new)
-- convert the stream to a list, taking only the first 100 steps
toList :: Monad m => P.Producer a m () -> m [a]
toList prod = P.fold (\x y -> x <> [y]) [] id (prod >-> P.take 100)
-- generate a stream of observations by sampling a point noisily for each step in the walk
observations = walk >-> P.mapM (\(x,y) ->
do
x' <- normal x 2
y' <- normal y 2
return (x, y, x', y'))
(xs, ys, xs', ys') <- sampleIOfixed $ unzip4 <$> toList observations
plotVega
(zip (zip (xs <> xs') (ys <> ys'))
(replicate 100 (T.pack "Latent") <> replicate 100 (T.pack "Observed")))
-- take the original random walk as a prior and condition on the observations
-- to obtain a posterior random walk
conditioning :: (MonadSample m, MonadCond m) => P.Producer (Double,Double) m ()
conditioning =
P.zip walk (P.each (zip xs' ys'))
>-> P.chain (\((x,y), (x',y')) -> factor (normalPdf x variance x' * normalPdf y variance y' ))
>-> P.map fst
model :: MonadInfer m => m [(Double,Double)]
model = toList conditioning
smcRes <- sampleIOfixed $ runPopulation $ smcMultinomial 100 5000 model
import Control.Arrow
import Data.List (sortOn, nubBy)
import Data.Function (on)
((infX, infY), strs) = second join $ first (unzip . join) $ unzip $ fmap (second (replicate 100 . (T.pack . show))) smcRes
plotVega (zip (zip (xs <> infX) (ys <> infY))
(replicate 100 (T.pack "True") <> take 10000 strs))